CISC3024 Pattern Recognition Final Project¶

Group Members:¶

  • Huang Yanzhen, DC126732
  • Mai Jiajun, DC127853

0. Project Setup¶

0.1 Packages & Device¶

In [1]:
# Torch
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader, Subset, random_split
from tqdm import tqdm

# Augmentation
import albumentations as A
from albumentations.pytorch import ToTensorV2
import scipy.io as sio

# Visualize Result
from sklearn.metrics import (confusion_matrix, accuracy_score,
                            precision_score, recall_score,
                            f1_score, roc_auc_score,
                            roc_curve, auc, precision_recall_curve,
                            average_precision_score)
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.preprocessing import label_binarize

# Basic
import numpy as np
import cv2
import os
import time
from typing import List, Tuple, Union
import random
import itertools
import copy
E:\Courses\CISC3024-Pattern-Recognition\cisc3024_pr_venv\lib\site-packages\albumentations\__init__.py:13: UserWarning: A new version of Albumentations is available: 1.4.20 (you have 1.4.18). Upgrade using: pip install -U albumentations. To disable automatic update checks, set the environment variable NO_ALBUMENTATIONS_UPDATE to 1.
  check_for_updates()
In [2]:
device_name = "cuda" if torch.cuda.is_available() else "cpu"
device = torch.device(device_name)
print(f"Using device: {device_name}")
Using device: cuda

0.2 Global Configurations¶

In [3]:
path_dataset = "./data/SVHN_mat"

1. Data Processing and Augmentation¶

1.1 Download Datasets¶

Define dataset class, retrieve dataset.

1.1 Notes¶

In [ ]:
_dat = sio.loadmat(os.path.join(path_dataset, "train_32x32.mat"))
# _dat['X'][0][0][0]
# np.array(_dat).shape
dat = np.transpose(_dat['X'], (3, 0, 1, 2))
# dat = dat.astype(np.float32)
dat.shape
In [ ]:
# First Image
dat[0].shape
In [ ]:
# First Row of Image
dat[0][0]
In [ ]:
# First Pixel of Image
dat[0][0][0]
In [ ]:
_transform = A.Compose([
    A.Normalize(mean=[0.4376845359802246, 0.4437684714794159, 0.47280389070510864], std=[0.19803018867969513, 0.2010156661272049, 0.19703581929206848]),
    ToTensorV2()
])

_img = dat[0]
_img = _transform(image=_img)['image']
# print(_img)
_img.shape

1.2 Dataset¶

In [140]:
class SVHNDataset(Dataset):
    def __init__(self, mat_file, transform=None):
        data = sio.loadmat(mat_file)
        
        self.images = np.transpose(data['X'], (3, 0, 1, 2))
        self.labels = data['y'].flatten()
        self.labels[self.labels == 10] = 0
        self.transform = transform        # Allow postponed injection of transform.

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]

        # There should always be a transform. 
        # It converts image to float, and permutes it from (32, 32, 3) to Tensor([3, 32, 32]).
        # ...which is important!!
        if self.transform is None:        
            raise ValueError("CISC3024 Custom Error: The transform should not be None when this object is passed into a DataLoader.")

        image = self.transform(image=image)['image']
        return image, label
    
    def get_meanstd(self, bias=None):
        if bias is not None:
            random_bias = random.randint(0, bias)
            images_ = []
            for i in range(len(self.images)):
                image = self.images[i]
                image = image.astype(np.int16)
                image = (image + random_bias) % 256
                image = image.astype(np.uint8)
                images_.append(image)
            images_ = np.array(images_)
        else:
            images_ = self.images
                
        images_ = images_.astype(np.float32) / 255.0
        mean = np.mean(images_, axis=(0,1,2))
        std = np.std(images_, axis=(0,1,2), ddof=0)
        
        return mean.tolist(), std.tolist()        
    
    def overwrite(self, indices:Union[list, np.ndarray]):
        """
        Create a deep copy of the mother dataset instance and only keep the wanted
        data samples, controlled by indices.
        """
        if any(index < 0 or index >= len(self.labels) for index in indices):
            raise IndexError("CISC3024 Custom Error: One or more indices are out of bounds.")
        
        new_dataset = copy.deepcopy(self)
        new_dataset.images = self.images[indices]
        new_dataset.labels = self.labels[indices]
        return new_dataset

1.2 Peak A Data¶

In [5]:
def peek(dataset):
    def unnormalize(img, mean, std):
        """Revert the normalization for visualization."""
        img = img * std + mean
        return np.clip(img, 0, 1)

    # Plotting multiple images in a grid
    grid_rows, grid_cols = 1, 6
    
    fig, axes = plt.subplots(grid_rows, grid_cols, figsize=(6, 6))
    
    peek_index = random.randint(0, dataset.__len__()-1)
    
    for i in range(grid_cols):
        img_tensor, label = dataset.__getitem__(peek_index)
        img = img_tensor.permute(1, 2, 0).numpy()  # Convert to (H, W, C)
        img = unnormalize(img, norm_mean, norm_std)
    
        ax = axes[i]  # Get subplot axis
        ax.imshow(img)
        ax.set_title(f"Label: {label}")
    
    plt.tight_layout()
    plt.show()
    print(f"Peaking data from training set of index {peak_index}.\nImage Tnesor Size:{train_dataset.__getitem__(peak_index)[0].shape}")

2. Neural Network¶

2.1 Model Structure¶

In [5]:
class SmallVGG(nn.Module):
    def __init__(self, frame_size=32):
        super(SmallVGG, self).__init__()
        self.frame_size = frame_size
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 8, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(8, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2), # 16x16

            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2), # 8x8

            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2), # 4x4
        )
    
        self.fc_layers = nn.Sequential(
            nn.Linear(frame_size * 4 * 4, 256),
            nn.ReLU(),
            nn.Linear(256, 10)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)
        x = self.fc_layers(x)
        return x

2.2 Train and Evaluate Function¶

In [6]:
def train_and_evaluate(model,
                      train_loader,
                      valid_loader,
                      criterion,
                      optimizer,
                      num_epochs=100):
    # Record Losses to plot
    train_losses = []
    valid_losses = []

    for epoch in range(num_epochs):
        # Train
        model.train()
        running_loss = 0.0
        for images, labels in tqdm(train_loader):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)

            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * len(images)
        train_losses.append(running_loss / len(train_loader))

        # Evaluate
        model.eval()
        valid_loss = 0.0
        with torch.no_grad():
            for images, labels in valid_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                valid_loss += loss.item() * len(images)

        valid_losses.append(valid_loss / len(valid_loader))
        print(f"Epoch[{epoch+1}/{num_epochs}], Train Loss:{train_losses[-1]:.4f}, Validation Loss:{valid_losses[-1]:.4f}")

    return train_losses, valid_losses

2.3 Get Predictions¶

Multiple functions are defined to evaluate data. Below is a list of them.

In [7]:
def get_predictions(model_path, extra_loader):
    if not isinstance(model_path, str):
        model_state = model_path
    else:
        model_state = torch.load(model_path)
    model = SmallVGG()
    model.load_state_dict(model_state)
    
    model.to(device)
    model.eval() 
    
    pred_scores = []  # Prob. of predictions
    true_labels = []  # Ground Truth
    pred_labels = []  # Label of prediction, i.e., argmax(softmax(pred_scores))
    
    with torch.no_grad():
        for images, labels in tqdm(extra_loader):
            images, labels = images.to(device), labels.to(device)
        
            outputs = model(images)
    
            pred_scores_batch = nn.functional.softmax(outputs, dim=-1)
    
            pred_scores.extend(pred_scores_batch.cpu().tolist())
            pred_labels.extend(outputs.argmax(dim=1).tolist())
            true_labels.extend(labels.cpu().tolist())
            
    return pred_scores, true_labels, pred_labels

2.4 Get Metrics¶

In [8]:
def get_metrics(true_labels, pred_labels):
    accuracy = accuracy_score(true_labels, pred_labels)
    precision = precision_score(true_labels, pred_labels, zero_division=1, average=None, labels=range(0,10))
    recall = recall_score(true_labels, pred_labels, zero_division=1, average=None, labels=range(0,10))
    f1 = f1_score(true_labels, pred_labels, zero_division=0, average=None, labels=range(0,10))

    return accuracy, precision, recall, f1
In [9]:
def print_metrics(accuracies, f1s):
    print(f"Accuracies:")
    for acc in accuracies:
        print(f"{acc:.3f}", end=" ")
    print("\n")
    
    print(f"F1 Score Lists:")
    mean_f1s = []
    for f1 in f1s:
        for val in f1:
            print(f"{val:.3f}", end=" ")
        mean_f1 = np.mean(f1)
        std_f1 = np.std(f1)
        mean_f1s.append(mean_f1)
        print(f"| Avg F1={mean_f1:.3f}, Std F1={std_f1}")
    print(f"Best: {np.argmax(mean_f1s)+1}-th")
In [11]:
# Compute ROC AUC for each class
def get_roc_auc(true_labels_bin, pred_labels_bin):
    roc_auc = dict()
    for i in range(0, 10):
        roc_auc[i] = roc_auc_score(true_labels_bin[:,i], np.array(pred_scores)[:, i])
    return roc_auc

3. Experiments¶

3.0 Preparation¶

3.0.1 Plot Functions¶

The experiments will be a list of the following structures:

{
    "HYPER_PARAM_1": combo[0],
    "HYPER_PARAM_2": combo[1],
    "train_losses": train_losses,
    "valid_losses": valid_losses,
    "model_state_dict": exp_model.state_dict()
}

Epoch-Loss Curves¶

In [10]:
def plot_el(loaded_experiments, hyper_param_names, n_rows=4, n_cols=4):
    fig_size = (n_cols * 5, n_rows * 5)
    n1, n2 = hyper_param_names
    
    fig, axes = plt.subplots(nrows=n_rows, ncols=n_cols, figsize=fig_size)
    # plt.tight_layout()
    
    for i, ax in enumerate(axes.flat):
        train_losses, valid_losses = loaded_experiments[i]["train_losses"], loaded_experiments[i]["valid_losses"]
        
        ax.plot(train_losses, label=f"TRL, min={np.min(train_losses):.3f}")
        ax.plot(valid_losses, label=f"VAL, min={np.min(valid_losses):.3f} at step={np.argmin(valid_losses)}")
        ax.set_xlabel("Epochs")
        ax.set_ylabel("Loss")
        ax.set_title(f"{n1}={loaded_experiments[i][n1]}, {n2}={loaded_experiments[i][n2]}")
        ax.legend(loc="upper right")
    
    plt.show()

Get Experiment Results¶

In [11]:
def get_experiment_results(loaded_experiments, test_hyperparam_names, extra_loader):
    experiment_results = []
    n1, n2 = test_hyperparam_names
    for i, exp in enumerate(loaded_experiments):
        pred_scores, true_labels, pred_labels = get_predictions(exp['model_state_dict'], extra_loader)
        experiment_results.append({
            n1: exp[n1],
            n2: exp[n2],
            "true_labels": true_labels,
            "pred_labels": pred_labels,
            "pred_scores": pred_scores
        })

        print(f"First 10 true labels:")
        [print(num, end=" ") for num in true_labels[:10]]
        print(f"...\n")

        print(f"First 10 pred labels:")
        [print(num, end=" ") for num in pred_labels[:10]]
        print(f"...\n")

        print(f"First 5 pred_scores:")
        [print(num, end=" ") for num in pred_scores[:5]]
        print(f"...\n")

        # del pred_scores, true_labels, pred_lables
        torch.cuda.empty_cache()
    return experiment_results

Confusion Matrix¶

In [12]:
def plot_cm(experiment_results, hyper_param_names, n_rows=4, n_cols=4):
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 5, n_rows * 5))
    axes = axes.flatten()

    hparam_1, hparam_2 = hyper_param_names
    
    for i, exp_rs in enumerate(experiment_results):
        true_labels, pred_labels = exp_rs['true_labels'], exp_rs['pred_labels']
        cm = confusion_matrix(true_labels, pred_labels)
        disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=range(0,10))
        disp.plot(ax=axes[i], cmap = plt.cm.Blues)
        axes[i].set_title(f"Exp {i+1}: {hparam_1}={exp_rs[hparam_1]}, {hparam_2}={exp_rs[hparam_2]}")
    
    plt.tight_layout()
    plt.show()

Precision-Recall Curve¶

In [13]:
def plot_pr(experiment_results, hyper_param_names, n_rows=4, n_cols=4):
    fig, axes = plt.subplots(n_rows,n_cols, figsize=(n_cols * 5, n_rows * 5))
    axes = axes.flatten()

    hparam_1, hparam_2 = hyper_param_names
    accuracies = []
    f1_scores = []
    
    for i, exp_rs in enumerate(experiment_results):
        true_labels, pred_labels, pred_scores = exp_rs['true_labels'], exp_rs['pred_labels'], exp_rs['pred_scores']
        true_labels_bin, pred_labels_bin = label_binarize(true_labels, classes=range(0,10)), label_binarize(pred_labels, classes=range(0,10))
        
        accuracy, precision, recall, f1 = get_metrics(true_labels, pred_labels)
        accuracies.append(accuracy)
        f1_scores.append(f1)
        
        for j in range(0, 10):
            # print(f"Class {j}: Prec:{precision[j]:.2f}, Recall:{recall[j]:.2f}, F_1 Score:{f1[j]:.2f}")
            precision_i, recall_i, _ = precision_recall_curve(true_labels_bin[:, j], np.array(pred_scores)[:, j])
    
            average_precision = average_precision_score(true_labels_bin[:, j], np.array(pred_scores)[:, j])
            axes[i].step(recall_i, precision_i, where="post", label=f"Class {j} AP={average_precision:.2f}")
            axes[i].set_title(f"PR-Curve {hparam_1}={exp_rs[hparam_1]}, {hparam_2}={exp_rs[hparam_2]}")
        axes[i].legend()
        axes[i].set_xlabel("Recall")
        axes[i].set_ylabel("Precision")
    
    # for j in range(i+1, 16):
    #     fig.delaxes(axes[j])
    
    plt.tight_layout()
    plt.show()
    return accuracies, f1_scores

ROC-AUC Curve¶

In [14]:
def plot_rocauc(experiment_results, hyper_param_names, curve_type, n_rows=4, n_cols=4):
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 5, n_rows * 5))
    axes = axes.flatten()

    hparam_1, hparam_2 = hyper_param_names

    for i, exp_rs in enumerate(experiment_results):
        true_labels, pred_scores = exp_rs['true_labels'], exp_rs['pred_scores']
        true_labels_bin = label_binarize(true_labels, classes=range(0, 10))

        # All Classes' ROC curve & ROC Area Under Curve
        fpr = dict()
        tpr = dict()
        roc_auc = dict()

        for j in range(10):
            fpr[j], tpr[j], _ = roc_curve(true_labels_bin[:, j], np.array(pred_scores)[:, j])
            roc_auc[j] = auc(fpr[j], tpr[j])

        # Macro-Average ROC & ROC-AUC
        all_fpr = np.unique(np.concatenate([fpr[j] for j in range(10)]))
        mean_tpr = np.zeros_like(all_fpr)
        for j in range(10):
            mean_tpr += np.interp(all_fpr, fpr[j], tpr[j])
        mean_tpr /= 10

        fpr["macro"] = all_fpr
        tpr["macro"] = mean_tpr
        roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

        # Compute micro-average ROC curve and ROC area
        fpr["micro"], tpr["micro"], _ = roc_curve(true_labels_bin.ravel(), np.array(pred_scores).ravel())
        roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

        # Plot only Macro or Micro ROC curves
        if curve_type == "macro_micro":
            axes[i].plot(fpr["macro"], tpr["macro"], label=f"Macro (AUC={roc_auc['macro']:.2f})")
            axes[i].plot(fpr["micro"], tpr["micro"], label=f"Micro (AUC={roc_auc['micro']:.2f})")
        elif curve_type == "all":
            # Plot all ROC curves
            for j in range(10):
                axes[i].plot(fpr[j], tpr[j], label=f"Class {j} (AUC={roc_auc[j]:.2f})")

        axes[i].plot([0, 1], [0, 1], "k--")
        axes[i].set_xlabel("False Positive Rate")
        axes[i].set_ylabel("True Positive Rate")
        axes[i].set_title(f"ROC Curve {i+1}, {hparam_1}={exp_rs[hparam_1]}, {hparam_2}={exp_rs[hparam_2]}")
        axes[i].legend(loc='lower right')

    plt.tight_layout()
    plt.show()

3.0.1 Datasets¶

In [15]:
def split_train_valid(train_dataset, train_ratio):
    ori_len = len(train_dataset)
    train_size = int(train_ratio * ori_len)
    valid_size = ori_len - train_size

    # These are subsets!! Don't directly use them or you will spend 2 hours solving for it.
    train_subset, valid_subset = random_split(train_dataset, [train_size, valid_size])

    # Re-construct two SVHNDataset object from indecies    
    train_dataset_ = train_dataset.overwrite(indices=train_subset.indices)
    valid_dataset_ = train_dataset.overwrite(indices=valid_subset.indices)
    
    return train_dataset_, valid_dataset_

3.1 Experiment 1: Optimizer¶

In the standard process of gradient descent, each update is proportional to the negative gradient (first-order derivative) of the loss function with respect to the parameter. In this traditional process, the learning rate is fixed, and it may cause problems.

  • Oscillations. If locally, the learning rate is too high, the model will jump around the local minimum.
  • Slow convergence. If locally, the learning rate is too low, the model will spend a lot of epochs to converge to a local minimum.

To solve this problem, we enable the learning rate to be adaptive by introducing the "momentum", a velocity-like term which accumulates past gradients in the direction of consistent descent.

  • The velocity term is the weighted sum of previous gradients.
  • ...such that the update direction does not only rely on the current gradient, but also on previous ones.

The update of velocity is represnted as: $$ v_t=\beta v_{t-1} + (1-\beta)\cdot\nabla J(\theta) $$ where $\beta$ is the momentum coefficient. In our experiments, $\beta$ will be fixed to $0.9$.

The update of parameters will be: $$ \theta_{t} = \theta_{t-1}-\eta\cdot v_{t} $$ In this experiment, we focus on the performance of different optimizers, each has its own optimized way to update the momentum. We will fix other variables, including transform, epoch number and learning rate, and only adjust the optimizers. There are a few optimizers to be chozen:

  • Adaptive Moment Estimation (Adam)
  • Stochastic Gradient Descent (SGD)
  • Root Mean Square Propagation (RMSprop)
  • Adam with Weight Decay (AdamW)
  • Adaptive Gradient Algorithm (Adgrad)
  • SGD with Momentum and Nesterove Accelerated Gradient
In [34]:
# Universal Train Dataset without splitting
exp1_universal_train_dataset = SVHNDataset(mat_file=os.path.join(path_dataset,"train_32x32.mat"))
exp1_mean, exp1_std = exp1_universal_train_dataset.get_meanstd()

print(f"Channel Means: {exp1_mean}")
print(f"Channel Stds: {exp1_std}")
Channel Means: [0.4376845359802246, 0.4437684714794159, 0.47280389070510864]
Channel Stds: [0.19803018867969513, 0.2010156661272049, 0.19703581929206848]

Define changing & non-changing hyper parameters.

In [35]:
exp1_hyperparams = {
    "num_epochs": 25,
    "lr": 1e-3,
    "criterion": nn.CrossEntropyLoss(),
    "transform": A.Compose([
        A.Normalize(mean=exp1_mean, std=exp1_std),
        ToTensorV2()
    ])
}

exp1_models = [SmallVGG().to(device) for _ in range(0,6)]

candidate_optimizers = [
    optim.Adam(exp1_models[0].parameters(), lr=exp1_hyperparams['lr']), 
    optim.SGD(exp1_models[1].parameters(), lr=exp1_hyperparams['lr'], momentum=0.9),
    optim.RMSprop(exp1_models[2].parameters(), lr=exp1_hyperparams['lr']),
    optim.AdamW(exp1_models[3].parameters(), lr=exp1_hyperparams['lr'], weight_decay=0.01),
    optim.Adagrad(exp1_models[4].parameters(), lr=exp1_hyperparams['lr']),
    optim.SGD(exp1_models[5].parameters(), lr=exp1_hyperparams['lr'], momentum=0.9, nesterov=True)]

Train, Validation and Test datasets.

In [36]:
# Train & Test Dataset
exp1_train_dataset = SVHNDataset(mat_file=os.path.join(path_dataset,"train_32x32.mat"), transform=exp1_hyperparams['transform'])
exp1_train_dataset, exp1_valid_dataset = split_train_valid(exp1_train_dataset, train_ratio=0.8)

# Test Dataset
exp1_test_dataset = SVHNDataset(mat_file=os.path.join(path_dataset,"test_32x32.mat"), transform=exp1_hyperparams['transform'])

print(f"Train Size:{exp1_train_dataset.__len__()}\nValidation Size:{exp1_valid_dataset.__len__()}\nTest Size:{exp1_test_dataset.__len__()}")
Train Size:58605
Validation Size:14652
Test Size:26032

Train, Validation and Test Data Loaders.

In [37]:
# Data Loaders
exp1_train_loader = DataLoader(exp1_train_dataset, batch_size=128, shuffle=True)
exp1_valid_loader = DataLoader(exp1_valid_dataset, batch_size=128, shuffle=True)
exp1_test_loader = DataLoader(exp1_test_dataset, batch_size=128, shuffle=False)

Run Experiments

In [38]:
def run_exp1(optimizers, models, hyper_params, train_loader, valid_loader):
    experiments = []
    for i, [optimizer, exp1_model] in enumerate(zip(optimizers, models)):
        print(f"Experiment {i+1}. Running experiment on optimizer: {optimizer.__class__.__name__}")

        criterion = hyper_params['criterion']
        num_epochs = hyper_params['num_epochs']
        train_losses, valid_losses = train_and_evaluate(exp1_model, train_loader, valid_loader, criterion, optimizer, num_epochs)

        experiments.append({
            "optimizer": optimizer.__class__.__name__,
            "others":"same",
            "train_losses": train_losses,
            "valid_losses": valid_losses,
            "model_state_dict": exp1_model.state_dict()
        })

        del exp1_model, criterion, optimizer
        torch.cuda.empty_cache()

    return experiments
In [ ]:
exp1 = run_exp1(candidate_optimizers, exp1_models, exp1_hyperparams, exp1_train_loader, exp1_valid_loader)
time_str = str(time.time()).replace(".","")
torch.save(exp1, f"./models/exp1_{time_str}.pth")

Load Experiments

Load Experiment objects and plot results.

In [ ]:
exp1_loaded = torch.load("./models/exp1_17303963644961627.pth")
exp1_results = get_experiment_results(exp1_loaded, test_hyperparam_names=["optimizer", "others"], extra_loader=exp1_test_loader)
In [41]:
plot_el(exp1_loaded, ["optimizer", "others"], n_rows=1, n_cols=6)
No description has been provided for this image
In [42]:
plot_cm(exp1_results, ["optimizer", "others"], n_rows=1, n_cols=6)
No description has been provided for this image

Precision-Recall Curve

Overfitted to inspect training performance. This "Best" does not tell the optimum optimizer.

In [43]:
exp1_accuracies, exp1_f1s = plot_pr(exp1_results, ["optimizer", "others"], n_rows=1, n_cols=6)
print_metrics(exp1_accuracies, exp1_f1s)
No description has been provided for this image
Accuracies:
0.907 0.196 0.910 0.908 0.791 0.196 

F1 Score Lists:
0.910 0.941 0.933 0.863 0.927 0.911 0.883 0.899 0.859 0.861 | Avg F1=0.899, Std F1=0.029276089747144043
0.000 0.328 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 | Avg F1=0.033, Std F1=0.09827503131926378
0.910 0.937 0.939 0.877 0.909 0.915 0.897 0.922 0.860 0.861 | Avg F1=0.903, Std F1=0.027107602833846337
0.909 0.941 0.944 0.865 0.923 0.911 0.891 0.918 0.855 0.833 | Avg F1=0.899, Std F1=0.035261349926122475
0.767 0.871 0.856 0.744 0.790 0.759 0.750 0.805 0.647 0.689 | Avg F1=0.768, Std F1=0.06501732647978666
0.000 0.328 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 | Avg F1=0.033, Std F1=0.09827503131926378
Best: 3-th
In [44]:
plot_rocauc(exp1_results, ["optimizer", "others"], curve_type="all", n_rows=1, n_cols=6)
No description has been provided for this image
In [45]:
plot_rocauc(exp1_results, ["optimizer", "others"], curve_type="macro_micro", n_rows=1, n_cols=6)
No description has been provided for this image

3.2 Experiment 2: Epoch Number and Learning Rate¶

This experiment seeks to find the effect of different combinations of epoch numbers and learning rates on the training & testing performance of the neural network.

3.2.1 Experiment 2-1: Rough Search¶

In this sub-experiment, we perform a rough search on the epochs and learning rate. We promoted four possible values for both parameters: $$ \text{candidate epochs}=\{10, 15, 20, 25\} $$ $$ \text{candidate lr}=\{1.0\times 10^{-3},1.0\times 10^{-4},1.0\times 10^{-5},1.0\times 10^{-6}\} $$

In [47]:
# Universal Train Dataset without splitting
exp2_universal_train_dataset = SVHNDataset(mat_file=os.path.join(path_dataset,"train_32x32.mat"))
exp2_mean, exp2_std = exp2_universal_train_dataset.get_meanstd()

print(f"Channel Means: {exp2_mean}")
print(f"Channel Stds: {exp2_std}")
Channel Means: [0.4376845359802246, 0.4437684714794159, 0.47280389070510864]
Channel Stds: [0.19803018867969513, 0.2010156661272049, 0.19703581929206848]
In [48]:
exp2_hyperparams = {
    "criterion": nn.CrossEntropyLoss(),
    "transform": A.Compose([
        A.Normalize(mean=exp2_mean, std=exp2_std),
        ToTensorV2()
    ]),
    "optimizer":optim.AdamW,
}

candidate_epochs = [10, 15, 20, 25]
candidate_lr = [1e-3, 1e-4, 1e-5, 1e-6]
In [49]:
torch.cuda.empty_cache()

# Train & Validation Datasets
exp2_train_dataset = SVHNDataset(mat_file=os.path.join(path_dataset,"train_32x32.mat"), transform=exp2_hyperparams['transform'])
exp2_train_dataset, exp2_valid_dataset = split_train_valid(exp2_train_dataset, train_ratio=0.8)

# Test Dataset
exp2_test_dataset = SVHNDataset(mat_file=os.path.join(path_dataset,"test_32x32.mat"), transform=exp2_hyperparams['transform'])

print(f"Train Size:{exp2_train_dataset.__len__()}\nValidation Size:{exp2_valid_dataset.__len__()}\nTest Size:{exp2_test_dataset.__len__()}")
Train Size:58605
Validation Size:14652
Test Size:26032
In [50]:
exp2_train_loader = DataLoader(exp2_train_dataset, batch_size=128, shuffle=True)
exp2_valid_loader = DataLoader(exp2_valid_dataset, batch_size=128, shuffle=True)
exp2_test_loader = DataLoader(exp2_test_dataset, batch_size=128, shuffle=False)
In [51]:
def run_exp2_1(epochs, lr_list, hyper_params, train_loader, valid_loader):
    combinations = list(itertools.product(epochs, lr_list))
    experiments = []
    for i, combo in enumerate(combinations):
        num_epochs, lr = combo

        print(f"Running Exp {i+1}: num_epoch={num_epochs}, lr={lr}")
        this_model = SmallVGG().to(device)
        criterion = hyper_params['criterion']
        optimizer = hyper_params['optimizer'](this_model.parameters(), lr=lr, weight_decay=0.01)
        train_losses, valid_losses = train_and_evaluate(this_model, train_loader, valid_loader, criterion, optimizer, num_epochs)

        experiments.append({
            "num_epochs": num_epochs,
            "lr": lr,
            "train_losses": train_losses,
            "valid_losses": valid_losses,
            "model_state_dict": this_model.state_dict()
        })

        del this_model, criterion, optimizer
    return experiments
In [ ]:
exp2_1 = run_exp2_1(candidate_epochs, candidate_lr, exp2_hyperparams, exp2_train_loader, exp2_valid_loader)
time_str = str(time.time()).replace(".","")
torch.save(exp2_1, f"./models/exp2-1_{time_str}.pth")
In [ ]:
exp2_1_loaded = torch.load("./models/exp2-1_17304023422640018.pth")
exp2_1_results = get_experiment_results(exp2_1_loaded, test_hyperparam_names=["num_epochs", "lr"], extra_loader=exp2_test_loader)

3.2.1-1 Epoch-Loss Curve¶

We found that the key to the training performance of a model is the learning rate. Epoch number only controls the progress of training.

From the perspective of learning rate (each column), only the learning rate of $1.0\times 10^{-3}$ shows a sign of convergence under each candidate epochs. With this learning rate, the model even overfitted under experiments with an epoch number over $15$. The best model we conclude from this rough selection is the one with the combination of $\text{num\_epoch}=10\land\text{lr}=1.0\times10^{-3}$. The minimum validation loss is $36.648$ at step $7$, which is the lowest of all $16$ samples. However, this doesn't mean that it is optimal since it may jump over a local minimum.

Moreover, as we inspect the performance on smaller learning rates, we found that they tend to converge in a way further epoch steps. Moreover, for the learning rate $1.0\times 10^{-6}$, the learning rate is too low that the model can not even fit under nearly-finite epochs.

In [54]:
plot_el(exp2_1_loaded, ["num_epochs", "lr"], n_rows=4, n_cols=4)
No description has been provided for this image

3.2.1-2 Confusion Matrix¶

In this rough search, the confusion matrix varies on different learning rates, and tends to be identical on different epochs.

Under a same epoch number, as leraning rate gets smaller, the confusion matrix gets "blurrer", meaning that the prediction is less accurate from the whole perspective. The learning rates under $1.0\times 10^{-5}$ are too low that the model can't converge in a reasonably number of epochs. For the lowest learning rate of $1.0\times 10^{-6}$, the model is not fitted at all. It classifies every number into 1, the number with the richest amount in the dataset.

In [55]:
plot_cm(exp2_1_results, ["num_epochs", "lr"], n_rows=4, n_cols=4)
No description has been provided for this image

3.2.1-3 Precision-Recall Curve¶

From a numerical perspective over the testing performance, the combination of $\text{num\_epoch}=15\land\text{lr}=1.0\times10^{-3}$ gives the highest accuracy of $0.907$, highest average $F_1$ score of $0.916$ and the lowest $F_1$ variance per-class of $0.025$.

In [59]:
exp2_1_accuracies, exp2_1_f1s = plot_pr(exp2_1_results, ["num_epochs", "lr"], n_rows=4, n_cols=4)
print_metrics(exp2_1_accuracies, exp2_1_f1s)
No description has been provided for this image
Accuracies:
0.915 0.836 0.486 0.196 0.914 0.865 0.520 0.196 0.913 0.876 0.589 0.196 0.902 0.880 0.636 0.196 

F1 Score Lists:
0.906 0.940 0.939 0.888 0.924 0.903 0.887 0.929 0.873 0.887 | Avg F1=0.908, Std F1=0.022740785980864705
0.827 0.914 0.886 0.770 0.857 0.805 0.771 0.882 0.717 0.731 | Avg F1=0.816, Std F1=0.06475924928424134
0.346 0.740 0.514 0.354 0.461 0.447 0.228 0.557 0.013 0.041 | Avg F1=0.370, Std F1=0.2152076147547944
0.000 0.328 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 | Avg F1=0.033, Std F1=0.09827503131926378
0.920 0.942 0.936 0.884 0.932 0.914 0.891 0.913 0.865 0.872 | Avg F1=0.907, Std F1=0.02589773808894772
0.874 0.925 0.913 0.817 0.878 0.835 0.817 0.897 0.770 0.769 | Avg F1=0.849, Std F1=0.05343572312454684
0.356 0.741 0.597 0.330 0.575 0.494 0.377 0.615 0.006 0.213 | Avg F1=0.430, Std F1=0.2071013592550223
0.000 0.328 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 | Avg F1=0.033, Std F1=0.09827503131926378
0.908 0.941 0.945 0.885 0.929 0.907 0.884 0.929 0.856 0.852 | Avg F1=0.904, Std F1=0.031728876018262464
0.868 0.933 0.924 0.832 0.894 0.857 0.830 0.900 0.787 0.794 | Avg F1=0.862, Std F1=0.04870114698696799
0.464 0.734 0.751 0.441 0.505 0.559 0.523 0.638 0.210 0.434 | Avg F1=0.526, Std F1=0.1507201592342292
0.000 0.328 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 | Avg F1=0.033, Std F1=0.09827503131926378
0.897 0.934 0.937 0.860 0.918 0.898 0.878 0.914 0.847 0.845 | Avg F1=0.893, Std F1=0.03238960226204535
0.885 0.935 0.917 0.823 0.896 0.860 0.844 0.898 0.824 0.796 | Avg F1=0.868, Std F1=0.04325231411216343
0.603 0.852 0.687 0.504 0.652 0.617 0.518 0.707 0.228 0.292 | Avg F1=0.566, Std F1=0.17967335896170858
0.000 0.328 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 | Avg F1=0.033, Std F1=0.09827503131926378
Best: 1-th
In [60]:
plot_rocauc(exp2_1_results, ["num_epochs", "lr"], curve_type="all", n_rows=4, n_cols=4)
No description has been provided for this image
In [61]:
plot_rocauc(exp2_1_results, ["num_epochs", "lr"], curve_type="macro_micro", n_rows=4, n_cols=4)
No description has been provided for this image

3.2.2 Experiment 2-2: Detailed¶

Previous sub-experiment tells that the best combination from all the listed ones is $\text{num\_epoch}=15 \land \text{lr}=1.0\times 10^{-3}$.

This is a rough solution, as it may jump over local minimums. We want to find a better learning rate around $1.0\times 10^{-3}$, with an even more detailed distinction between candidate values, so that it may reveal a missing local minimum without using too much epochs.

We conducted an excessive experiment, purposely seeking an overfitting point over the listed candidate learning rates. We do this by setting the epoch number to $50$.

In [62]:
exp2_2_hyperparams = {
    "num_epoch": 20,
    "criterion": nn.CrossEntropyLoss(),
    "transform": A.Compose([
        A.Normalize(mean=exp2_mean, std=exp2_std),
        ToTensorV2()
    ]),
    "optimizer":optim.Adam,
}

# More detailed candidate learning rates around 1e-3, that is 10e-4.
exp2_2_candidate_lr = [17e-4, 15e-4, 15e-4, 14e-4, 13e-4, 12e-4, 8e-4, 7e-4, 6e-4, 5e-4, 4e-4, 3e-4]
In [82]:
def run_exp2_2(lr_list, hyper_params, train_loader, test_loader):
    experiments = []
    for i, lr in enumerate(lr_list):

        print(f"Running Exp {i+1}: lr={lr}")
        this_model = SmallVGG().to(device)
        num_epochs = hyper_params['num_epoch']
        criterion = hyper_params['criterion']
        optimizer = hyper_params['optimizer'](this_model.parameters(), lr=lr, weight_decay=0.01)
        train_losses, valid_losses = train_and_evaluate(this_model, train_loader, test_loader, criterion, optimizer, num_epochs)

        experiments.append({
            "num_epochs": num_epochs,
            "lr": lr,
            "train_losses": train_losses,
            "valid_losses": valid_losses,
            "model_state_dict": this_model.state_dict()
        })

        del this_model, criterion, optimizer
    return experiments
In [ ]:
exp2_2 = run_exp2_2(exp2_2_candidate_lr, exp2_2_hyperparams, exp2_train_loader, exp2_valid_loader)
time_str = str(time.time()).replace(".", "")
torch.save(exp2_2, f"./models/exp2-2_{time_str}.pth")
In [ ]:
exp2_2_loaded = torch.load("./models/exp2-2_17304071314112995.pth")
exp2_2_results = get_experiment_results(exp2_2_loaded, test_hyperparam_names=["num_epochs", "lr"], extra_loader=exp2_test_loader)

3.2.2-1 Epoch-Loss Curve¶

By inspecting the epoch-loss curve, we found that all the experiments are overfitted. This means that $50$ epochs are enough for conducting the detailed search.

From all the detailed searches, the learning rate of $14\times10^{-4}$, that is 1.4e-3, yields the lowest validation loss of $36.259$ at step $5$, which is the overfitting point. We discovered a new local minimum that's been jumped over by learning rate of 1e-3, which previously yield a validation loss of $36.648$.

In [66]:
plot_el(exp2_2_loaded, ["num_epochs", "lr"], n_rows=2, n_cols=6)
No description has been provided for this image

3.2.2-2 Confusion Matrix¶

At a glance, from the perspective of confusion matrix, the testing performance on unknown data is roughly identical.

In [67]:
plot_cm(exp2_2_results, ["num_epochs", "lr"], n_rows=2, n_cols=6)
No description has been provided for this image

3.2.2-3 Precision-Recall Curve¶

By inspecting the evaluation metrics, we found our judgement correct. From all the over-fitted model, the model with learning rate of $7.0\times 10^{-4}$ yields the highest accuracy of $0.909$ and the highest average per-class $F_1$ score of $0.903$. Besides, the per-class $F_1$ score is also less variant under the learning rate of $7.0\times 10^{-4}$, with the standard deviation of $0.023$.

In [69]:
exp2_2_accuracies, exp2_2_f1s = plot_pr(exp2_2_results, ["num_epochs", "lr"], n_rows=2, n_cols=6)
print_metrics(exp2_2_accuracies, exp2_2_f1s)
No description has been provided for this image
Accuracies:
0.905 0.909 0.905 0.896 0.904 0.908 0.904 0.906 0.900 0.904 0.901 0.892 

F1 Score Lists:
0.904 0.931 0.936 0.879 0.923 0.899 0.865 0.912 0.840 0.881 | Avg F1=0.897, Std F1=0.029019711710490413
0.902 0.939 0.937 0.877 0.929 0.897 0.884 0.917 0.854 0.879 | Avg F1=0.901, Std F1=0.027249619260366488
0.892 0.935 0.933 0.874 0.926 0.916 0.874 0.908 0.836 0.859 | Avg F1=0.895, Std F1=0.032047660610461416
0.895 0.928 0.933 0.856 0.923 0.901 0.872 0.881 0.836 0.841 | Avg F1=0.887, Std F1=0.033520138553783996
0.901 0.936 0.932 0.872 0.926 0.905 0.876 0.909 0.844 0.845 | Avg F1=0.895, Std F1=0.03214838081150217
0.909 0.930 0.935 0.879 0.926 0.911 0.885 0.912 0.863 0.863 | Avg F1=0.901, Std F1=0.025612410587384667
0.904 0.938 0.938 0.866 0.926 0.893 0.881 0.910 0.847 0.837 | Avg F1=0.894, Std F1=0.034195826718722434
0.901 0.934 0.941 0.873 0.922 0.913 0.880 0.903 0.848 0.850 | Avg F1=0.897, Std F1=0.03125728672040046
0.908 0.933 0.928 0.870 0.919 0.892 0.869 0.880 0.855 0.866 | Avg F1=0.892, Std F1=0.02681609191626165
0.907 0.937 0.943 0.870 0.919 0.906 0.872 0.913 0.852 0.819 | Avg F1=0.894, Std F1=0.037298183800018245
0.896 0.937 0.935 0.868 0.912 0.898 0.865 0.910 0.849 0.844 | Avg F1=0.892, Std F1=0.031961893604000764
0.903 0.926 0.912 0.853 0.913 0.889 0.873 0.882 0.841 0.852 | Avg F1=0.884, Std F1=0.02792655014372886
Best: 2-th

3.2.2-4 ROC-AUC Curve¶

The ROC-AUC Curve under all the detailed candidate learning rates are roughly identical.

In [70]:
plot_rocauc(exp2_2_results, ["num_epochs", "lr"], curve_type="all", n_rows=2, n_cols=6)
No description has been provided for this image
In [71]:
plot_rocauc(exp2_2_results, ["num_epochs", "lr"], curve_type="macro_micro", n_rows=2, n_cols=6)
No description has been provided for this image

3.3 Experiment 3: Image Augmentation Parameters¶

3.3.1 Experiment 3-1: Rotation Angles and Crop Ratios¶

In [141]:
exp3_universal_train_dataset = SVHNDataset(mat_file=os.path.join(path_dataset, "train_32x32.mat"))

# The mean & std here will only be used for experiment 3-1.
exp3_1_mean, exp3_1_std = exp3_universal_train_dataset.get_meanstd()

print(f"Channel Means: {exp3_1_mean}\nChannel Stds: {exp3_1_std}")
Channel Means: [0.4376845359802246, 0.4437684714794159, 0.47280389070510864]
Channel Stds: [0.19803018867969513, 0.2010156661272049, 0.19703581929206848]
In [77]:
exp3_1_hyperparams = {
    "num_epochs": 10,
    "lr": 5e-4,
    "criterion": nn.CrossEntropyLoss(),
    "optimizer": optim.AdamW,
    "transform": A.Compose([
        A.Normalize(mean=exp1_mean, std=exp1_std),
        ToTensorV2()
    ])
}

# Group 1
candidate_angles = [15, 30, 45, 60]
candidate_crops = [0.08, 0.24, 0.40, 0.60] # Left Boundary
In [142]:
torch.cuda.empty_cache()

# Train & Validation Datasets
exp3_train_dataset = SVHNDataset(mat_file=os.path.join(path_dataset, "train_32x32.mat"))
exp3_train_dataset, exp3_valid_dataset = split_train_valid(exp3_train_dataset, train_ratio=0.8)

# Test Dataset
exp3_test_dataset = SVHNDataset(mat_file=os.path.join(path_dataset, "test_32x32.mat"), transform=exp3_1_hyperparams["transform"])
print(f"Train Size:{exp3_train_dataset.__len__()}\nValidation Size:{exp3_valid_dataset.__len__()}\nTest Size:{exp3_test_dataset.__len__()}")
Train Size:58605
Validation Size:14652
Test Size:26032
In [143]:
exp3_test_loader = DataLoader(exp3_test_dataset, batch_size=128, shuffle=False)
In [130]:
def run_exp3_1(angles, crops, hyper_params, train_dataset, valid_dataset):
    combinations = list(itertools.product(angles, crops))
    experiments = []
    for i, combo in enumerate(combinations):
        angle, crop = combo
        
        print(f"Running Exp {i+1}: angles={angle}, crop={crop}")
        this_model = SmallVGG().to(device)
        num_epochs = hyper_params['num_epochs']
        lr = hyper_params['lr']
        criterion = hyper_params['criterion']
        optimizer = hyper_params['optimizer'](this_model.parameters(), lr=lr, weight_decay=0.01)

        # Define Transform
        this_transform = A.Compose([
            A.RandomResizedCrop(32, 32, scale=(crop, 1.0)),
            A.Rotate(limit=angle),
            A.Normalize(mean=exp3_1_mean, std=exp3_1_std),
            ToTensorV2()
        ])

        # Generate Dataset
        print(f"Exp {i+1}: Generating dataset from transform")
        train_dataset.transform = this_transform
        valid_dataset.transform = this_transform
        
        train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
        valid_loader = DataLoader(valid_dataset, batch_size=128, shuffle=False)

        # Train Model
        train_losses, valid_losses = train_and_evaluate(this_model, 
                                                       train_loader, 
                                                       valid_loader, 
                                                       criterion, 
                                                       optimizer,
                                                       num_epochs)

        experiments.append({
            "angle": angle,
            "crop": crop,
            "train_losses": train_losses,
            "valid_losses": valid_losses,
            "model_state_dict": this_model.state_dict()
        })

        del this_model, criterion, optimizer
        del train_loader, valid_loader
        
        torch.cuda.empty_cache()

    return experiments
In [ ]:
exp3_1 = run_exp3_1(candidate_angles, candidate_crops, exp3_1_hyperparams, exp3_train_dataset, exp3_valid_dataset)
time_str = str(time.time()).replace(".","")
torch.save(exp3_1, f"./models/exp3-1_{time_str}.pth")
In [ ]:
exp3_1_loaded = torch.load("./models/exp3-1_1730412367135605.pth")
exp3_1_results = get_experiment_results(exp3_1_loaded, test_hyperparam_names=["angle", "crop"], extra_loader=exp3_test_loader)
In [86]:
plot_el(exp3_1_loaded, ["angle", "crop"], n_rows=4, n_cols=4)
No description has been provided for this image
In [87]:
plot_cm(exp3_1_results, ["angle", "crop"], n_rows=4, n_cols=4)
No description has been provided for this image
In [88]:
exp3_accuracies, exp3_f1s = plot_pr(exp3_1_results, ["angle", "crop"], n_rows=4, n_cols=4)
print_metrics(exp3_accuracies, exp3_f1s)
No description has been provided for this image
Accuracies:
0.861 0.868 0.877 0.903 0.852 0.865 0.881 0.906 0.841 0.873 0.874 0.897 0.858 0.867 0.870 0.897 

F1 Score Lists:
0.841 0.921 0.911 0.797 0.878 0.871 0.827 0.904 0.725 0.797 | Avg F1=0.847, Std F1=0.058953708849960075
0.810 0.920 0.906 0.816 0.911 0.884 0.839 0.917 0.745 0.796 | Avg F1=0.854, Std F1=0.05837212888352291
0.868 0.914 0.910 0.824 0.922 0.889 0.846 0.866 0.800 0.826 | Avg F1=0.867, Std F1=0.04009923059871438
0.882 0.940 0.944 0.872 0.910 0.914 0.869 0.925 0.829 0.825 | Avg F1=0.891, Std F1=0.040588392995837934
0.834 0.912 0.893 0.784 0.899 0.851 0.808 0.875 0.738 0.782 | Avg F1=0.837, Std F1=0.055312248934860725
0.851 0.920 0.911 0.807 0.897 0.855 0.806 0.893 0.763 0.808 | Avg F1=0.851, Std F1=0.050846063403817476
0.873 0.922 0.917 0.821 0.916 0.900 0.861 0.888 0.801 0.799 | Avg F1=0.870, Std F1=0.0453468039810251
0.891 0.930 0.941 0.876 0.926 0.917 0.882 0.905 0.846 0.848 | Avg F1=0.896, Std F1=0.03164409892053704
0.841 0.907 0.888 0.768 0.875 0.845 0.818 0.866 0.671 0.749 | Avg F1=0.823, Std F1=0.0694711993348085
0.847 0.920 0.922 0.833 0.902 0.870 0.832 0.882 0.776 0.787 | Avg F1=0.857, Std F1=0.04882954639950114
0.861 0.914 0.912 0.828 0.899 0.879 0.851 0.868 0.796 0.811 | Avg F1=0.862, Std F1=0.03895370150699186
0.878 0.932 0.928 0.859 0.935 0.903 0.875 0.898 0.845 0.819 | Avg F1=0.887, Std F1=0.03714507797632649
0.811 0.926 0.899 0.800 0.872 0.872 0.824 0.899 0.748 0.759 | Avg F1=0.841, Std F1=0.058551706311618164
0.826 0.926 0.909 0.822 0.892 0.864 0.816 0.902 0.762 0.804 | Avg F1=0.852, Std F1=0.051366126058318876
0.832 0.932 0.900 0.831 0.905 0.865 0.809 0.906 0.766 0.796 | Avg F1=0.854, Std F1=0.052532712015071564
0.883 0.936 0.927 0.855 0.915 0.905 0.875 0.906 0.837 0.827 | Avg F1=0.887, Std F1=0.035692420818319505
Best: 8-th
In [89]:
plot_rocauc(exp3_1_results, ["angle", "crop"], curve_type="all", n_rows=4, n_cols=4)
No description has been provided for this image
In [90]:
plot_rocauc(exp3_1_results, ["angle", "crop"], curve_type="macro_micro", n_rows=4, n_cols=4)
No description has been provided for this image

3.3.2 Experiment 3-2: Ratios & Bias¶

In [144]:
exp3_2_hyperparams = {
    "num_epochs": 10,
    "lr": 5e-4,
    "criterion": nn.CrossEntropyLoss(),
    "optimizer": optim.AdamW,
    "crop":0.6,
    "angle":30,
    "transform": A.Compose([
        A.Normalize(mean=exp1_mean, std=exp1_std),
        ToTensorV2()
    ])
}

class AddBiasTransform:
    def __init__(self, bias: Union[int, Tuple[int, int]]) -> None:
        if isinstance(bias, tuple):
            self.bias1 = bias[0]
            self.bias2 = bias[1]
        else:
            self.bias1 = 0
            self.bias2 = bias

    def __call__(self, img: np.ndarray) -> np.ndarray:
        _dtype = img.dtype
        bias_value = random.randint(self.bias1, self.bias2)
        img = (img.astype(np.int16) + bias_value) % 256
        return img.astype(_dtype)
        
# Group 2
candidate_ratios = [0.25, 0.42, 0.58, 0.75]
candidate_channel_biases = [0, 32, 64, 128]

Control candidates for different variables

In [149]:
def run_exp3_2(ratios, biases, hyper_params, train_dataset, valid_dataset, universal_train_dataset):
    combinations = list(itertools.product(ratios, biases))
    experiments = []
    for i, combo in enumerate(combinations):
        ratio, bias = combo
        
        print(f"Running Exp {i+1}: ratio={ratio}, bias={bias}")
        this_model = SmallVGG().to(device)
        num_epochs = hyper_params['num_epochs']
        lr = hyper_params['lr']
        criterion = hyper_params['criterion']
        optimizer = hyper_params['optimizer'](this_model.parameters(), lr=lr, weight_decay=0.01)

        # Define Transform
        this_mean, this_std = universal_train_dataset.get_meanstd(bias=bias)
        this_transform = A.Compose([
            A.Lambda(image=lambda img, **kwargs: AddBiasTransform(bias)(img)),  # Lambda customized transform block
            A.RandomResizedCrop(32, 32, scale=(hyper_params['crop'], 1.0), ratio=(ratio, 1.0 / ratio)),
            A.Rotate(limit=hyper_params['angle']),
            A.Normalize(mean=this_mean, std=this_std),
            ToTensorV2()
        ])

        # Generate Dataset
        print(f"Exp {i+1}: Generating dataset from transform")
        train_dataset.transform = this_transform
        valid_dataset.transform = this_transform
        
        train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
        valid_loader = DataLoader(valid_dataset, batch_size=128, shuffle=False)

        # Train Model
        train_losses, valid_losses = train_and_evaluate(this_model, 
                                                       train_loader, 
                                                       valid_loader, 
                                                       criterion, 
                                                       optimizer,
                                                       num_epochs)

        experiments.append({
            "ratio": ratio,
            "bias": bias,
            "train_losses": train_losses,
            "valid_losses": valid_losses,
            "model_state_dict": this_model.state_dict()
        })

        del this_model, criterion, optimizer
        del train_loader, valid_loader
        
        torch.cuda.empty_cache()

    return experiments
In [ ]:
exp3_2 = run_exp3_2(candidate_ratios, candidate_channel_biases, exp3_2_hyperparams, exp3_train_dataset, exp3_valid_dataset, exp3_universal_train_dataset)
time_str = str(time.time()).replace(".", "")
torch.save(exp3_2, f"./models/exp3-2_{time_str}.pth")
In [ ]:
exp3_2_loaded = torch.load("./models/exp3-2_17304509951687949.pth")
exp3_2_results = get_experiment_results(exp3_2_loaded, test_hyperparam_names=["ratio", "bias"], extra_loader=exp3_test_loader)
In [153]:
plot_el(exp3_2_loaded, ["ratio", "bias"], n_rows=4, n_cols=4)
No description has been provided for this image
In [155]:
plot_cm(exp3_2_results, ["ratio", "bias"], n_rows=4, n_cols=4)
No description has been provided for this image
In [156]:
exp3_2_accuracies, exp3_2_f1s = plot_pr(exp3_2_results, ["ratio", "bias"], n_rows=4, n_cols=4)
print_metrics(exp3_2_accuracies, exp3_2_f1s)
No description has been provided for this image
Accuracies:
0.888 0.889 0.861 0.757 0.889 0.870 0.857 0.815 0.868 0.874 0.852 0.853 0.878 0.878 0.829 0.799 

F1 Score Lists:
0.876 0.916 0.932 0.864 0.911 0.892 0.852 0.860 0.821 0.847 | Avg F1=0.877, Std F1=0.03321812117840835
0.894 0.929 0.924 0.843 0.914 0.892 0.858 0.893 0.805 0.831 | Avg F1=0.878, Std F1=0.03979292054828506
0.838 0.901 0.898 0.811 0.883 0.869 0.820 0.870 0.767 0.835 | Avg F1=0.849, Std F1=0.04051631612264006
0.790 0.756 0.826 0.692 0.710 0.778 0.758 0.814 0.692 0.707 | Avg F1=0.752, Std F1=0.04753679473121518
0.883 0.925 0.929 0.849 0.918 0.892 0.864 0.875 0.803 0.828 | Avg F1=0.877, Std F1=0.0396059755138224
0.864 0.912 0.918 0.823 0.881 0.871 0.836 0.872 0.781 0.807 | Avg F1=0.857, Std F1=0.041990455912870366
0.866 0.876 0.911 0.813 0.872 0.872 0.840 0.836 0.768 0.816 | Avg F1=0.847, Std F1=0.03901193467163097
0.779 0.880 0.875 0.739 0.836 0.811 0.780 0.830 0.715 0.742 | Avg F1=0.799, Std F1=0.05436474612085296
0.876 0.910 0.913 0.815 0.891 0.874 0.825 0.874 0.773 0.805 | Avg F1=0.856, Std F1=0.04531640669527025
0.875 0.910 0.918 0.812 0.898 0.880 0.857 0.854 0.804 0.832 | Avg F1=0.864, Std F1=0.03745148690354468
0.852 0.893 0.901 0.782 0.875 0.859 0.830 0.860 0.764 0.791 | Avg F1=0.841, Std F1=0.04502402786643079
0.836 0.896 0.917 0.781 0.836 0.839 0.845 0.872 0.788 0.810 | Avg F1=0.842, Std F1=0.0414119020757317
0.866 0.904 0.927 0.846 0.904 0.886 0.852 0.857 0.809 0.804 | Avg F1=0.865, Std F1=0.038508310961775434
0.830 0.905 0.924 0.853 0.894 0.882 0.855 0.899 0.810 0.825 | Avg F1=0.868, Std F1=0.03653449685505532
0.802 0.875 0.887 0.775 0.845 0.824 0.796 0.826 0.729 0.775 | Avg F1=0.814, Std F1=0.04569527257505044
0.750 0.868 0.872 0.738 0.835 0.804 0.747 0.812 0.654 0.701 | Avg F1=0.778, Std F1=0.06818637211280652
Best: 2-th
In [157]:
plot_rocauc(exp3_2_results, ["ratio", "bias"], curve_type="all", n_rows=4, n_cols=4)
No description has been provided for this image
In [158]:
plot_rocauc(exp3_2_results, ["ratio", "bias"], curve_type="macro_micro", n_rows=4, n_cols=4)
No description has been provided for this image
In [ ]:
candidate_epoch_num = [20, 40, 60, 80]
candidate_lr = [1e-3, 1e-4, 1e-5, 1e-6]

From the controlled variables, generate all the possible experiment set.

In [ ]:
combinations = list(itertools.product(candidate_epoch_num, candidate_lr))
for combo in combinations:
    print(f"[{combo[0]}, {combo[1]:.0e}]", end=" ")